********************************************************************************
* matching_worker_diff_market.do
* Purpose: ROBUSTNESS CHECK - Worker PSM requiring matched workers to work at
*          firms in DIFFERENT geographic markets from the treated firm.
*
* Complements matching_firm_diff_market.do at the worker level.
* Workers at control firms that share a CZ x NAICS market with the treated
* firm are excluded from the control pool, ensuring the control worker is
* not exposed to the same M&A labor market shock.
*
* Output: $data/worker_matched_diff_market.dta
********************************************************************************
global start_year = 2005
global end_year = 2016

**# matching
forvalues y = $start_year/$end_year {

	use $data/worker_`y', replace
	
	gen year_prior	=	year
	// merge in matched firms
	joinby entid_syn year_prior using $data/firm_matched_diff_market_list
	
	
	** Keep worker if earn more than ~4k CAD in real wage **
	egen 	total_wage 		= sum(t4earn), by(casenum2019)
	gen		real_total_wage = total_wage/CPI_base_2011
	drop if real_total_wage < 3900 | mi(real_total_wage)
	
	* drop workers moonlighters //
	drop if moonlighter == 1

	* drop workers with less than 4 years of tenure and gaps in their employment
	drop 	if max_gap > 1 | mi(max_gap)					// drop workers with gaps in their employment
	gen 	present_at_treated = (`y' + 1 <= last_year_at_firm)
	drop 	if present_at_treated == 0 						// drop workers that leave firms at t = 0
	drop 	if tenure < 4									// drop workers with less than 4 years of tenure
	
	drop if mi(t1_age_recorded) | mi(t1_sex_recorded) | mi(naics2) | mi(OPAddressProvince)
	
	** worker age: 5 years bin **
	gen bin_age	=	int(( t1_age_recorded - 17)/5)
	
	* create interactions between bins
	* workers must be matches within the same cells
	egen cell = group(naics2 OPAddressProvince t1_sex_recorded bin_*)
	drop if cell == .
	
	* estimate pscore using the eligible sample
	reg treated c.t1_age_recorded#c.t1_age_recorded
	
	predict pscore
	
	replace pscore = pscore + 10.0*cell
	
	** we have to sort the data for replication
	gen outcome = rnormal()
	
	*** Fix the sorting of data
	gsort casenum2019
	
	* matching
	psmatch2 treated, outcome(outcome) pscore(pscore) caliper(1) n(1) noreplacement
	sum outcome

	gen 	id = _id if treated==0 & _weight==1
	replace id = _n1 if treated==1 & _weight==1
	
	drop pairid
	egen pairid = group(id)
	gsort pairid
	
	drop if mi(id)
	keep casenum2019 entid_syn treated pairid year_prior
	
	save $data/worker_matched_list_`y',  replace 
}

drop _all
forvalues y = $start_year/$end_year {
	dis( "`y'")
	append using $data/worker_matched_list_`y',  keep(casenum2019 entid_syn treated pairid year_prior) force
}	

compress
save $data/worker_matched_diff_market_list, replace

**# matched worker panel
forvalues y = 2001/2017 {

	use $data/worker_`y', clear
	
	rename entid_syn temp_id
	
	* keep matched treated and control workers
	joinby casenum2019 	using $data/worker_matched_diff_market_list, unmatched(none)
	merge m:1 entid_syn using $data/first_mna, keep(1 3) keepusing(entid_syn) nogen

	** keep workers at dominant (M&A) firms **
	gen 	same_firm = (temp_id == entid_syn)
	drop 	entid_syn
	rename	temp_id entid_syn
	
	egen total_wage = sum(t4earn), by(casenum2019 pairid year_prior)
	
	gsort casenum2019 year_prior -same_firm -t4earn entid_syn
	duplicates drop casenum2019 year_prior pairid, force
	
	drop same_firm
	
	compress
	save $data/worker_matched_`y', replace
}

drop _all
forvalues y = 2001/2017 {
	append using $data/worker_matched_`y', force
	erase $data/worker_matched_`y'.dta
}

save $data/worker_matched_diff_market_intermid, replace

**# matched worker panel
gsort pairid year_prior -treated year

** ID variables
egen worker_id	= group(casenum2019)
egen firm_id 	= group(entid_syn)
egen id 		= group(worker_id year_prior)

**# Time Variables
gen 	t = year - (year_prior + 1)
replace t = 6 	if t >  5 	& ~mi(t)
replace t = -6 	if t < -5 	& ~mi(t)
tab t, gen(ds_)
levelsof t, local(ts)
local end = r(r)
forvalue i = 1(1)`end'{
	local temp = `i' - 7
	label variable ds_`i' "`temp'"
}
replace ds_6 = 0

**# Sector
destring naics, replace
drop naics2
gen 	naics2 = int(naics/100)
replace naics2 = 31 if naics2 == 32 | naics2 == 33
replace naics2 = 44 if naics2 == 45
replace naics2 = 48 if naics2 == 49
replace naics2 = 54 if naics2 == 56 | naics2 == 61 | naics2 == 62

** identify the sector prior to the event
gen naics2_event 	= naics2	if treated == 1	& year == year_prior
gen naics_event		= naics		if year == year_prior
gen firm_event 		= firm_id 	if year == year_prior
gegen naics_tmp 	= firstnm(naics_event), by(id)
gegen firmid_tmp 	= firstnm(firm_event),  by(id)
replace naics 		= naics_tmp if firm_id == firmid_tmp & year <= year_prior + 1

** merge in mna characteristics
merge	m:1 entid_syn 	using 	$data/first_mna, keep(1 3) keepusing(DEAL Acquirer merger) nogen
replace DEAL = . 		if t ~= -1 & treated == 1
replace Acquirer = . 	if t ~= -1 & treated == 1

**# Transition Dummies
gsort id year

** Moved
by id: 	gen year_diff	= year - year[_n-1]
by id: 	gen moved 		= (firm_id ~= firm_id[_n-1] | year_diff >= 2) & ~mi(year_diff)

**# pair-level variables
gsort pairid year_prior -treated year

gegen matched_acq           = firstnm(Acquirer), 			by(pairid year_prior)

compress
save $data/worker_matched_diff_market, replace